import requests
from lxml import etree
from urllib import request
import pandas as pd
import ktool

url ='https://www.runoob.com/html/html-tutorial.html'


headers ={
    'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36'
}

response =requests.get(url,headers=headers)
text =response.text
zones =ktool.xpath.xpath_all(text,"//div[@class='design']/a")
a = []
b = []
for zone in zones:
    # print(zone)
    hrefs = zone.xpath('./@href')
    for href in hrefs:
        c_url = 'https://www.runoob.com'+href
        a.append(c_url)
    titles = ktool.xpath.xpath_all(zone, './text()')
    for title in titles:
        m=title.strip()
        b.append(m)
print(a)

#构造dataframe
dic = {'url':a,'title':b}
print(dic)

data= pd.DataFrame(dic)
data.info()

data.to_excel('model6.xlsx',index=False)
